rm(list = ls())
library(dplyr)
library(ggplot2)
library(lubridate)
library(haven)
library(here)
library(limma)

here::here()

## needed for Venn diagram if limma is not installed
# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("limma")


#----------------------------------------------
# Read data
#----------------------------------------------

journ = readxl::read_xlsx('Master Dataset_Journalists Killed.xlsx')
journ$year = year(journ$`Fecha (month-day-year)`)

# cited in 2nd paragraph: mean journalists killed by year pre-2007 vs post 2007
journ %>%
  mutate(year_group = ifelse(`Fecha (month-day-year)` <  "2006-12-11 UTC", 
                             "Before War on Drugs", "After War on Drugs")) %>%
  group_by(year_group) %>%
  summarize(mean_kills_per_year = n() / length(unique(year)))

# plot saving parameters
aspect_ratio <- 2.5
height <- 7


#----------------------------------------------
# MAIN PAPER'S FIGURES
#----------------------------------------------

#### Figure 2
journ %>% count(year) %>% 
  ggplot(aes(x=year, y=n), xlim(1992,2019)) +
  geom_line(color = "#FC4E07", linewidth = 2) +
  scale_x_continuous(breaks = 1992:2019) +
  # ggtitle("Journalists Murdered by Year") +
  labs(y = "Number of Journalists Killed", x = "") +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  theme_classic() +
  scale_y_continuous(breaks = 0:15) +
  geom_vline(xintercept = 2005, linetype="dashed", color = "blue", 
             linewidth=1.5) +
  annotate("text", x = 2001, y = 8, 
           label = "Fox launches federal military\ncampaign against drug cartels",
           color = "blue", vjust = 1.2, size = 11) +
  annotate("text", x = 2009.5, y = 3, 
           label = "Calderón launches\nWar on Drugs", color = "blue",
           vjust = 1.2, size = 11) +
  geom_vline(xintercept = 2006.9, linetype="dashed", color = "blue",
             linewidth = 1.5) +
  theme(plot.title = element_text(hjust = 0.5, size=25,face="bold"),
        axis.title.x = element_text(size=24,face="bold"),
        axis.title.y = element_text(size=44,face="bold"),
        axis.title=element_text(size=25,face="bold"),
        axis.text.x.bottom =element_text(size=22,face="bold"),
        axis.text.y.left = element_text(size=30,face="bold",
                                        margin = margin(l = 20)),
        title =element_text(size=5,face="bold"))
ggsave("Figure2.png", dpi = 600,
       height = 12, width = 10 * aspect_ratio)



### Figure 4
type <- journ[, c("Medio internacional", "Medio nacional", "Medio regional", "Medio local")]
n_yes <- data.frame(type = names(type), 
                    total_yes = colSums(type == "1"))
rownames(n_yes) <- c("International","National", "Regional", "Local")
n_yes$type <- c("International", "National", "Regional", "Local")
n_yes$order <- c(1,2,3,4)

# Reorder the 'type' factor with desired levels
n_yes$type <- factor(n_yes$type, levels = c("Local", "Regional", "National", "International"))

# Now, plot the data with the specified order
ggplot(n_yes, aes(x = type, y = total_yes/sum(total_yes), fill = type)) + 
  geom_bar(stat = "identity", width = .5) +
  guides(fill = FALSE) +
  theme_bw() +
 # scale_fill_grey(start = 0, end = .8) +
  theme(plot.title = element_text(hjust = 0.5, size = 25, face = "bold"),
        axis.title.y = element_text(size=37,face="bold"),
        axis.title = element_text(size = 25, face = "bold"),
        axis.text.x.bottom = element_text(size = 27, face = "bold"),
        axis.text.y.left = element_text(size = 27, face = "bold",
                                        margin = margin(l = 20)),
        title = element_text(size = 5, face = "bold")) +
  labs(y = "Percentage of Journalists Killed", x = "") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

ggsave("Figure4.png",
       dpi = 600, height = 12, width = 7 * aspect_ratio)

#### Figure 5
ranking = readxl::read_xlsx('Ranking.xlsx')
ranking$Ranking <- factor(ranking$Ranking, levels = c( "Baja","Media", "Alta"))
levels(ranking$Ranking) <- c(
  "Investigative Journalists/\nReporters/Photojournalists",
  "Newsroom Staff",
  "Editors and Owners") 
ranking %>% 
  count(Ranking) %>%
  mutate(perc = n / nrow(ranking)) %>%
  ggplot(aes(x = Ranking, y = perc, fill = Ranking)) +
  geom_bar(stat = "identity", width = .5) +
  guides(fill = FALSE) +
   scale_fill_grey(start = 0, end = .8) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size=25,face="bold"),
        axis.title.x = element_text(size=24,face="bold"),
        axis.title.y = element_text(size=37,face="bold"),
        axis.title=element_text(size=25,face="bold"),
        axis.text.x.bottom =element_text(size=27,face="bold"),
        axis.text.y.left = element_text(size=26,face="bold",
                                        margin = margin(l = 20)),
        title =element_text(size=5,face="bold")) +
  labs(y = "Percentage of Journalists Killed", x = "") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), 
                     breaks = seq(0,.5,.1))
ggsave("Figure5.png", dpi = 600, 
       height = 12, width = 7 * aspect_ratio)

#### Figure 6
topics <- journ[, 13:21]
names1 <- c("Politics", "Corruption", "Crime &\nSecurity", "Sports", "Social movements/\n Human rights", "Culture", "International", "Economy", "General")
colnames(topics) <- names1
n_topic <- data.frame(topics = names(topics), 
                      total_yes = colSums(topics == "1"))
ggplot(n_topic, aes(x = reorder(topics, -total_yes), 
                    y = total_yes/sum(total_yes),
                    fill = topics)) + 
  geom_bar(stat = "identity") +
  theme_bw() +
 # scale_fill_grey(start = 0, end = .9) +
  guides(fill=FALSE) +
  theme(plot.title = element_text(hjust = 0.5, size=25,face="bold"),
        axis.title.x = element_text(size=24,face="bold"),
        axis.title.y = element_text(size=44,face="bold"),
        axis.text.y.left = element_text(size = 33, face = "bold",
                                        margin = margin(l = 20)),
        axis.title=element_text(size=25,face="bold"),
        axis.text.x.bottom =element_text(size=22,face="bold"),
        title =element_text(size=5,face="bold")) +
  labs(y = "Percentage of Journalists Killed", x = "") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))
ggsave("Figure6.png", dpi = 600, 
       height = 12, width = 10*aspect_ratio)

#### Figure 7
# Venn Diagram
c3 <- cbind(journ[,13:15]) # 3 main topics of interest
colnames(c3) <- c("Electoral", "Corruption", "Security")
a <- vennCounts(c3)
colnames(a) <- c("Politics", "Corruption", "Crime & Security", "Counts")
pdf("Figure7.pdf", width = 11.91, height = 6.97,
    pointsize = 9)
vennDiagram(a, 
            include=c("up", "down"),
            # counts.col=c("red", "blue"),
            circle.col = c("#619CFF", "#F8766D", "#D39200"),
            show.include = F, lwd = 2)
dev.off()


#----------------------------------------------
# APPENDIX'S FIGURES
#----------------------------------------------
#### Figure A.V.b
# Read state-level data
data.state <- as_tibble(read_dta(paste0("data-state-level.dta", sep = "")))

## Before and After for treated states
d=data.state %>%
  filter(groupdummy==1) %>%
  mutate(state = case_when(
    state == "Nuevo Leon" ~ "Nuevo Leon",
    state == "Michoacan" ~ "Michoacan",
    TRUE ~ state
  ))

d %>%
  ggplot(aes(y = count, x = factor(jointopsyeardummy),
             fill = factor(jointopsyeardummy))) +
  theme_bw() +
  facet_wrap(~state) +
  geom_bar(stat = "identity",
           position=position_dodge(), width = 0.7) +
  labs(y = "Number of Journalists Killed", x = "Timing of Militarization") +
  scale_fill_discrete(name = "Timing", labels = c("Before", "After")) +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5, size=25,face="bold"),
        axis.title.x = element_text(size=24,face="bold"),
        axis.title=element_text(size=25,face="bold"),
        axis.text.x.bottom =element_text(size=20),
        axis.text.y.left = element_text(size=20),
        title =element_text(size=25,face="bold"),
        strip.text = element_text(size=20),
        legend.text = element_text(size = 15)) +
  scale_x_discrete(labels = c("Before", "After")) +
  geom_hline(yintercept = 0)

ggsave("FigureA.V.b.png",
       dpi = 600, height = 12, width = 7 * aspect_ratio)



### Read A19 data
# Set the working directory to the correct path

# Read the Excel file -- goes from 2009-2020
a19_aggresor <- readxl::read_excel("a19.xlsx",
                          sheet = "Agresor"); a19_aggresor



#### Figure A.XIX.a
a19_aggresor %>%
  filter(`Tipo de agresor` != "TOTAL") %>%
  mutate(`Perpetrator` = case_when(
    `Tipo de agresor` == "Crimen organizado" ~ "Organized crime",
    `Tipo de agresor` == "Funcionario público" ~ "Public official",
    `Tipo de agresor` == "Particular" ~ "Private individual",
    `Tipo de agresor` == "Partido político" ~ "Political party",
    `Tipo de agresor` == "Se desconoce" ~ "Unknown",
    TRUE ~ `Tipo de agresor`
  )) %>%
  pivot_longer(cols = `2009`:`2020`, names_to = "Year", values_to = "Count") %>%
  group_by(Year) %>%
  mutate(Total_Aggressions = sum(Count), 
         Percentage = (Count / Total_Aggressions) * 100) %>%
  ggplot(aes(x = Year, y = Percentage, fill = `Perpetrator`)) +
  geom_bar(stat = "identity", position = "dodge", color = "black", size = 1) +
  geom_text(aes(label = ifelse(`Perpetrator` == "Public official", 
                               paste("N =", Total_Aggressions), 
                               "")),
            vjust = -1.5, size = 6, fontface = "bold", hjust = 0.5) +
  labs(title = "",
       x = "Year",
       y = "Percentage") +
  scale_fill_brewer(palette = "Set3") +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 25, face = "bold"),
    axis.title.x = element_text(size = 34, face = "bold"),
    axis.title = element_text(size = 35, face = "bold"),
    axis.text.x.bottom = element_text(size = 30, face = "bold"),
    axis.text.y.left = element_text(size = 30, face = "bold"),
    title = element_text(size = 15, face = "bold"),
    legend.text = element_text(size = 25),
    legend.title = element_text(size = 30, face = "bold"),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 1)
  ) +
  coord_cartesian(ylim = c(0, 70))  # Set y-axis limit

ggsave("Figure A.XIX.a.png",
       width = 18,
       height = 9.5,
       dpi = 600)

#### Figure A.XIX.b
a19_level <- readxl::read_excel("a19.xlsx",
                          sheet = "Nivel"); a19_level

# Extract the second row for percentages
percentages <- a19_level[2, ]

# Convert to long format and order by levels
percentages_long <- percentages %>%
  pivot_longer(cols = everything(), names_to = "Level",
               values_to = "Percentage") %>%
  mutate(Percentage = Percentage * 100) %>% 
  mutate(Level = factor(Level, 
                        levels = c("Federal", "State", "Municipal"))) # Set order

ggplot(percentages_long, aes(x = Level, y = Percentage, fill = Level)) +
  geom_col() +
#  geom_col(fill = "white", color = "blue") +
  scale_y_continuous(labels = scales::percent_format(scale = 1)) + 
  labs(title = "",
       x = "Government Level",
       y = "Percentage") +
  geom_text(aes(label = paste0(round(Percentage,2), "%")), 
            vjust = -0.5, size = 8) +
  theme_bw() + 
  theme(
    legend.position = "none",
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.x = element_text(size = 22), 
    axis.text.y = element_text(size = 22)
  )
ggsave("Figure A.XIX.b.png",
       width = 18,
       height = 9.5,
       dpi = 600)
